## ---------------------------
##
## Script name: process_fatal_shootings_Nation.R
##
## Description: Processes previously-geocoded file of fatal 
## shootings with census tract information for 100 cities.
## We summarize to determine the number of fatal shootings 
## for each city/tract combination.
##
## ---------------------------
## DEPENDENCIES

library(dplyr)
library(tidyverse)
library(sf)

# list of 100 cities to be included
city_list <- read.csv(paste(file_locations$current_fp, file_locations$Helpers$`City List`, sep="/"))

## ---------------------------

process_fatal_shootings_nation <- function(file_locations, overwrite=F) {
  #'
  #'@description Aggregates data to tract-year level for 
  #'all years 2014-2020.
  #'
  #'@param file_locations list. File locations loaded from
  #'file_locations.R
  #'@param overwrite boolean. Determines if already processed
  #'file should be overwritten.
  #'
  #'@return saves the clean output and returns NULL

  input_fp <- paste(file_locations$current_fp, file_locations$Nation$Shootings$raw, sep="/")
  output_fp <- paste(file_locations$current_fp, file_locations$Nation$Shootings$processed, sep="/")

  if (!file.exists(input_fp) | overwrite) {
    
    df <- read.csv(input_fp) %>%
      mutate(tract_2010 = str_pad(tract_2010, 11, side="left", pad="0"))
    
    city_tracts <- read_sf(paste(file_locations$current_fp, file_locations$Nation$Geographies$processed, sep="/")) %>% 
      st_drop_geometry(.) %>% 
      dplyr::select(city, tract_2010)
    
    # d. Summarize total fatal shootings for each tract/year
    gva <- df %>%
      inner_join(city_tracts, by="tract_2010") %>%
      filter(year >= 2014, year <= 2020) %>%
      group_by(city, tract_2010, year) %>%
      summarize(fatal_shootings = sum(killed)) %>%
      ungroup()
    
    # e. Get all tract/year combinations and fill in zeroes if missing
    year <- seq(2014, 2020)
    year <- as.data.frame(year)
    full_tract_year <- city_tracts %>% mutate(dummy=1) %>%
      inner_join(year %>% mutate(dummy=1),
                 by="dummy") %>%
      dplyr::select(-dummy)
    
    gva <- full_tract_year %>%
      left_join(gva, by=c("city","tract_2010", "year")) %>%
      mutate(fatal_shootings = ifelse(is.na(fatal_shootings), 0, fatal_shootings))
    
    write.csv(gva, output_fp,
            row.names = F)
  }

}


